import sys
# make category map
if len( sys.argv ) < 3:
    print 'Usage:<input> <output> [level=4]'
    exit( 0 )

if len( sys.argv ) > 3:
    nlev = int( sys.argv[3] )
else:
    nlev = 4
    
catmap={}
fi = open( sys.argv[1], 'r' )
fo = open( sys.argv[2], 'w' )
ncnt = 0
for line in fi:
    arr = line.split()
    key = '.'.join( arr[1].split('.')[ 0: nlev ] )
    if not key in catmap:
        catmap[ key ] = len( catmap )
    cid = catmap[ key ]
    ncnt += 1
    fo.write( '%s\t%d\t%s\n' % ( arr[0], cid, key ) )     
fi.close()
fo.close()

print 'level=%d, %d/%d = %f in all' % ( nlev, ncnt, len(catmap), float(ncnt) / len(catmap) )

